Goal/Purpose of operations:
The DepMap/PRISM’s primary and secondary pooled drug screens were used to help evaluate if a candidate could be a suitable candidate (if that drug was tested). The primary screen calculated the median of log fold change median fluorescence intensity between replicates of a cell line treated with a drug. The PRISM study considered a cell line as sensitive to a treatment if the median-collapsed fold-change is less than 0.3. The secondary screen calculated the Area under the curve of the dose-response curve from a 4-parameter logistic curve. While the PRISM study did not provide a cut-off for sensitivity, lower AUC values were considered more sensitive, and we compared the AUC values of a drug candidate to the AUC of temozolomide, the standard treatment of GBM (~0.90).
Finished psedocode on:
220920
System which operations were done on:
my laptop
GitHub Repo:
Transfer_Learning_R03
Docker:
rstudio_cancer_dr
Directory of operations:
/home - docker
Scripts being edited for operations:
NA
Data being used:
DESeq2 and limma results
Papers and tools:
NA
library(ggplot2)
library(cowplot)
library(ggpubr)
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
##
## get_legend
library(recount3)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(SummarizedExperiment)
library(viridis)
## Loading required package: viridisLite
source("/home/rstudio/script/functions_cancer_signature_reversion_JLF.R")
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ BiocGenerics::Position() masks ggplot2::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ dplyr::slice() masks IRanges::slice()
#deseq2 results
deseq_results <- read.csv("~/output/deseq2_gbm/220927_deseq2_gbm_normal_gtx_res.csv", sep=",")
feature_info <- readRDS("~/data/recount3/recount3_fix_download/feature_info.rds")
identical(feature_info$gene_id, rownames(deseq_results))
## [1] TRUE
deseq_results$Symbol<- feature_info$gene_name
#limma results
#GBM_GTEX_gene_limma_res <- readRDS("~/output/limma_gbm/220421_GBM_GTEX_gene_limma_res.rds")
#GBM_GTEX_gene_limma <- GBM_GTEX_gene_limma_res$limma
#rds not in github using tsv instead
GBM_GTEX_gene_limma <- read_tsv("~/output/limma_gbm/220421_limma_TCGA_GTEX_T_vs_NT_limma_results.tsv")
## Rows: 63856 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): LV
## dbl (6): logFC, AveExpr, t, P.Value, adj.P.Val, B
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#disease signatures from all the methods
gbm_tfl_input <- readRDS("~/output/TF_L_GBM/22004_SR_gene_list_gbm_tfl.rds")
gbm_deseq2_input<- readRDS("~/output/deseq2_gbm/SR_gene_list_gbm_deseq2.rds")
gbm_limma_input<- readRDS("~/output/limma_gbm/SR_gene_list_gbm_limma.rds")
#liver
#deseq2
LIHC_deseq_results <- read.csv("~/output/liver_cancer/deseq2_res/220929_deseq2_lihc_normal_gtx_res.csv", sep=",")
LIHC_deseq_results$Symbol<- feature_info$gene_name
#limma
#LIHC_GTEX_gene_limma_res <- readRDS("~/output/liver_cancer/limma_res/220808_LIHC_GTEX_gene_limma_res.rds")
#LIHC_GTEX_gene_limma <- LIHC_GTEX_gene_limma_res$limma
LIHC_GTEX_gene_limma<- read_tsv("~/output/liver_cancer/limma_res/220808_limma_TCGA_GTEX_T_vs_NT_liver_cancer_limma_results.tsv")
## Rows: 63856 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): LV
## dbl (6): logFC, AveExpr, t, P.Value, adj.P.Val, B
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#disease singatures
liver_tfl_input <- readRDS("~/output/liver_cancer/TFL_res/SR_gene_list_liver_tfl.rds")
liver_deseq2_input <- readRDS("~/output/liver_cancer/deseq2_res/SR_gene_list_liver_deseq2.rds")
liver_limma_input <- readRDS("~/output/liver_cancer/limma_res/SR_gene_list_liver_limma.rds")
#lung
#deseq2
lung_deseq_results <- read.csv("~/output/lung_cancer/deseq2_res/220929_deseq2_luad_normal_gtx_res.csv", sep=",")
lung_deseq_results$Symbol<- feature_info$gene_name
#limma
#lung_GTEX_gene_limma_res <- readRDS("~/output/lung_cancer/limma_res/220808_LUAD_GTEX_gene_limma_res.rds")
#lung_GTEX_gene_limma <- lung_GTEX_gene_limma_res$limma
lung_GTEX_gene_limma<- read_tsv("~/output/lung_cancer/limma_res/220808_limma_TCGA_GTEX_T_vs_NT_lung_cancer_limma_results.tsv")
## Rows: 63856 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): LV
## dbl (6): logFC, AveExpr, t, P.Value, adj.P.Val, B
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#disease singatures
lung_deseq2_input <- readRDS("~/output/lung_cancer/deseq2_res/SR_gene_list_luad_deseq2.rds")
lung_tfl_input<- readRDS("~/output/lung_cancer/deseq2_res/SR_gene_list_lung_tfl.rds")
lung_limma_input<- readRDS("~/output/lung_cancer/deseq2_res/SR_gene_list_lung_limma.rds")
#pancreatic cancer
#deseq2
paad_deseq_results <-read.csv("~/output/pancreas_cancer/deseq2_res/Deseq2_paad_normal_gtx_res.csv", sep= ",")
paad_deseq_results$Symbol<- feature_info$gene_name
#limma
#paad_GTEX_gene_limma_res <- readRDS("~/output/pancreas_cancer/limma_res/220808_PAAD_GTEX_gene_limma_res.rds")
#paad_GTEX_gene_limma <- paad_GTEX_gene_limma_res$limma
paad_GTEX_gene_limma<- read_tsv("~/output/pancreas_cancer/limma_res/220808_limma_TCGA_GTEX_T_vs_NT_pancreas_cancer_limma_results.tsv")
## Rows: 63856 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): LV
## dbl (6): logFC, AveExpr, t, P.Value, adj.P.Val, B
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#disease singatures
paad_tfl_input <- readRDS("~/output/pancreas_cancer/TFL_res/SR_gene_list_paad_tfl.rds")
paad_limma_input <- readRDS("~/output/pancreas_cancer/limma_res/SR_gene_list_paad_limma.rds")
paad_deseq2_input <- readRDS("~/output/pancreas_cancer/deseq2_res/SR_gene_list_paad_deseq2.rds")
need to create the gene venn diagram for liver cancer
compare gene list across the different methods
venn_dia_methods(liver_limma_input$up, liver_deseq2_input$up, liver_tfl_input$up, file_name='~/output/liver_cancer/SR_up_genes_venn_diagramm.png' )
venn_dia_methods(liver_limma_input$down, liver_deseq2_input$down, liver_tfl_input$down, file_name='~/output/liver_cancer/SR_down_genes_venn_diagramm.png' )
venn_dia_methods(unlist(liver_limma_input), unlist(liver_deseq2_input), unlist(liver_tfl_input), file_name='~/output/liver_cancer/SR_all_genes_venn_diagramm.png' )
GBM
#gbm_deseq2
deseq_gbm_filter <- volcano_plots(deseq_results, "DESeq2", gbm_deseq2_input, gbm_tfl_input)
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length
table(deseq_gbm_filter$padj[deseq_gbm_filter$groups == "Transfer Learning"] < 0.05)
##
## FALSE TRUE
## 32 71
limma_gbm_filter<- volcano_plots(GBM_GTEX_gene_limma, "limma", gbm_limma_input, gbm_tfl_input)
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length
table(limma_gbm_filter$padj[limma_gbm_filter$groups == "Transfer Learning"] < 0.05)
##
## FALSE TRUE
## 42 51
liver
deseq_liver_filter<- volcano_plots(LIHC_deseq_results, "DESeq2", liver_deseq2_input, liver_tfl_input)
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length
table(deseq_liver_filter$padj[deseq_liver_filter$groups == "Transfer Learning"] < 0.05)
##
## FALSE TRUE
## 14 74
limma_liver_filter<- volcano_plots(LIHC_GTEX_gene_limma, "limma", liver_limma_input, liver_tfl_input)
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length
table(limma_liver_filter$padj[limma_liver_filter$groups == "Transfer Learning"] < 0.05)
##
## FALSE TRUE
## 4 79
Lung
deseq_lung_filter<- volcano_plots(lung_deseq_results, "DESeq2", lung_deseq2_input, lung_tfl_input)
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length
table(deseq_lung_filter$padj[deseq_lung_filter$groups == "Transfer Learning"] < 0.05)
##
## FALSE TRUE
## 11 105
limma_lung_filter<- volcano_plots(lung_GTEX_gene_limma, "limma", lung_limma_input, lung_tfl_input)
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length
table(limma_lung_filter$padj[limma_lung_filter$groups == "Transfer Learning"] < 0.05)
##
## FALSE TRUE
## 11 91
#gbm_deseq2
deseq_paad_filter<- volcano_plots(paad_deseq_results, "DESeq2", paad_deseq2_input, paad_tfl_input)
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length
table(deseq_paad_filter$padj[deseq_paad_filter$groups == "Transfer Learning"] < 0.05)
##
## FALSE TRUE
## 83 30
limma_paad_filter<- volcano_plots(paad_GTEX_gene_limma, "limma", paad_limma_input, paad_tfl_input)
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length
table(limma_paad_filter$padj[limma_paad_filter$groups == "Transfer Learning"] < 0.05)
##
## FALSE TRUE
## 65 42
GBM
compare_deseq2_limma(deseq_results, GBM_GTEX_gene_limma)
## Warning: Removed 7 rows containing non-finite values (stat_bin2d).
## Warning: Removed 7 rows containing non-finite values (stat_cor).
liver
compare_deseq2_limma(LIHC_deseq_results, LIHC_GTEX_gene_limma, x_p1=2, y_p1=20, x_p2=100, y_p2=400)
compare_deseq2_limma(lung_deseq_results,lung_GTEX_gene_limma, x_p1=-5, y_p1=20, x_p2=100, y_p2=400)
compare_deseq2_limma(paad_deseq_results,paad_GTEX_gene_limma, x_p1=-5, y_p1=20, x_p2=25, y_p2=50)
cancers<- factor(c("glioblastoma", "liver hepatocellular \ncarcinoma", "lung \nadenocarcinoma", " pancreatic \nadenocarcinoma"), levels = c("glioblastoma", "liver hepatocellular \ncarcinoma", "lung \nadenocarcinoma", " pancreatic \nadenocarcinoma"))
deseq2_limma_sum<- data.frame(cancers= cancers, spearman =c(0.94, 0.69, 0.81, 0.81, 0.72, 0.55, 0.61, 0.55), Metric = c(rep("logFC", 4), rep("Adj. p-value", 4)))
ggplot(deseq2_limma_sum, aes(x=Metric, y= spearman, fill=Metric)) + geom_bar(stat="identity", position=position_dodge(), color = "black")+ ylab("Spearman Coefficient") +
facet_wrap(~ cancers, ncol = 4)+ xlab("Metric") + scale_fill_viridis_d(option= "E") +theme(axis.text.x=element_blank(),axis.ticks.x=element_blank()) +theme(text = element_text(size = 35, face="bold"))
library(readr)
deseq_results_all <- read_csv("~/output/deseq2_gbm/220928_SR_LINCS_GBM_DESEQ2_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
#correct file based on limma gbm script
limma_results_all <- read_csv("~/output/limma_gbm/220421_SR_LINCS_GBM_LIMMA_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
tfl_results_all <- read_csv("~/output/TF_L_GBM/220808_SR_LINCS_GBM_TRL_RES.csv")
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 101544 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): pert, cell, type, trend, t_gn_sym
## dbl (9): ...1, PCID, WTCS, WTCS_Pval, WTCS_FDR, NCS, NCSct, N_upset, N_downset
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
lincs_method_comparsion(deseq_results_all,limma_results_all , tfl_results_all, cell_line= "GI1")
#liver
deseq_results_all_liver <- read_csv("~/output/liver_cancer/deseq2_res/220929_SR_LINCS_LIHC_DESEQ2_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
limma_results_all_liver<- read_csv("~/output/liver_cancer/220808_SR_LINCS_LIVER_LIMMA_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
tfl_results_all_liver<- read_csv("~/output/liver_cancer/TFL_res/220808_SR_LINCS_LIVER_TFL_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
lincs_method_comparsion(deseq_results_all_liver,limma_results_all_liver , tfl_results_all_liver, cell_line= "HEPG2")
#lung
deseq_results_all_lung <- read_csv("~/output/lung_cancer/deseq2_res/220929_SR_LINCS_LUAD_DESEQ2_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
limma_results_all_lung <- read_csv("~/output/lung_cancer/limma_res/220808_SR_LINCS_LUNG_LIMMA_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
tfl_results_all_lung<- read_csv("~/output/lung_cancer/220601_SR_LINCS_LUNG_TFL_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
lincs_method_comparsion(deseq_results_all_lung ,limma_results_all_lung , tfl_results_all_lung, cell_line= "A529")
deseq_results_all_PAAD <- read_csv("~/output/pancreas_cancer/deseq2_res/220929_SR_LINCS_PAAD_DESEQ2_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
limma_results_all_PAAD <- read_csv( "~/output/pancreas_cancer/limma_res/220808_SR_LINCS_PANCREAS_LIMMA_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
tfl_results_all_PAAD<- read_csv( "~/output/pancreas_cancer/220808_SR_LINCS_PANCREAS_TFL_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
lincs_method_comparsion(deseq_results_all_PAAD ,limma_results_all_PAAD , tfl_results_all_PAAD, cell_line= "YAPC")
#plot all of them together
NCS_sp<- c(0.59, 0.31, 0.38, 0.18, 0.036, 0.4, 0.023, -0.027, 0.27, 0.34, 0.28, 0.36)
FDR_sp <- c(0.37, 0.15, 0.13, -0.012, -0.048, 0.18, 0.069, -0.12, -0.032, 0.12, 0.16, 0.23)
NCS_test<- c(T, T,T, T, F,T,F,F,T,T,T,T)
FDR_test<- c(T,T,T,F,F,T,F,F,F,T,T,T)
#c("glioblastoma", "liver hepatocellular carcinoma", "lung adenocarcinoma", " pancreatic adenocarcinoma")
cancer<- c(rep("glioblastoma", 3), rep("liver hepatocellular \ncarcinoma", 3), rep("lung \nadenocarcinoma", 3), rep("pancreatic \nadenocarcinoma", 3))
comp <- rep( c("DESeq2 vs. \nlimma", "DESeq2 vs \nTransfer Learning", "limma vs \nTransfer Learning"), 8)
lincs_result_compare <- data.frame(NCS_sp, FDR_sp, NCS_test, FDR_test, cancer, comp)
lincs_result_compare_longer<- pivot_longer(lincs_result_compare, cols = c(NCS_sp, FDR_sp), values_to = "spearman", names_to = "metric_1")
lincs_result_compare_longer2<- pivot_longer(lincs_result_compare_longer, cols = c(NCS_test, FDR_test), values_to = "Significant", names_to = "metric_2")
lincs_result_compare_longer2$metric <- strtrim(lincs_result_compare_longer2$metric_1, 3)
lincs_result_compare_longer2$symbol <- ifelse(lincs_result_compare_longer2$Significant == TRUE, "*", "")
ggplot(lincs_result_compare_longer2, aes(x=metric, y= spearman, fill=metric)) + geom_bar(stat="identity", position=position_dodge(), color = "black")+ ylab("Spearman Coefficient") + xlab("Metric") +theme(axis.text.x=element_blank(),axis.ticks.x=element_blank()) +facet_grid( comp ~ cancer) + scale_fill_viridis_d(option= "E")+theme(text = element_text(size = 20, face="bold"))
Location of final scripts:
scripts
Location of data produced:
output
Dates when operations were done:
220920
sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] forcats_0.5.2 stringr_1.4.1
## [3] dplyr_1.0.10 purrr_0.3.4
## [5] readr_2.1.2 tidyr_1.2.1
## [7] tibble_3.1.8 tidyverse_1.3.2
## [9] viridis_0.6.2 viridisLite_0.4.1
## [11] recount3_1.4.0 SummarizedExperiment_1.24.0
## [13] Biobase_2.54.0 GenomicRanges_1.46.1
## [15] GenomeInfoDb_1.30.1 IRanges_2.28.0
## [17] S4Vectors_0.32.4 BiocGenerics_0.40.0
## [19] MatrixGenerics_1.6.0 matrixStats_0.62.0
## [21] ggpubr_0.4.0 cowplot_1.1.1
## [23] ggplot2_3.3.6
##
## loaded via a namespace (and not attached):
## [1] googledrive_2.0.0 colorspace_2.0-3 ggsignif_0.6.3
## [4] rjson_0.2.21 ellipsis_0.3.2 XVector_0.34.0
## [7] fs_1.5.2 rstudioapi_0.13 farver_2.1.1
## [10] bit64_4.0.5 fansi_1.0.3 lubridate_1.8.0
## [13] xml2_1.3.3 R.methodsS3_1.8.2 cachem_1.0.6
## [16] knitr_1.40 jsonlite_1.8.0 Rsamtools_2.10.0
## [19] broom_1.0.1 dbplyr_2.2.1 R.oo_1.25.0
## [22] compiler_4.1.3 httr_1.4.4 backports_1.4.1
## [25] assertthat_0.2.1 Matrix_1.5-1 fastmap_1.1.0
## [28] gargle_1.2.1 cli_3.4.1 htmltools_0.5.3
## [31] tools_4.1.3 gtable_0.3.1 glue_1.6.2
## [34] GenomeInfoDbData_1.2.7 rappdirs_0.3.3 Rcpp_1.0.9
## [37] carData_3.0-5 cellranger_1.1.0 jquerylib_0.1.4
## [40] vctrs_0.4.2 Biostrings_2.62.0 rtracklayer_1.54.0
## [43] xfun_0.33 rvest_1.0.3 lifecycle_1.0.2
## [46] restfulr_0.0.15 rstatix_0.7.0 XML_3.99-0.10
## [49] googlesheets4_1.0.1 zlibbioc_1.40.0 scales_1.2.1
## [52] vroom_1.5.7 hms_1.1.2 parallel_4.1.3
## [55] yaml_2.3.5 curl_4.3.2 memoise_2.0.1
## [58] gridExtra_2.3 sass_0.4.2 stringi_1.7.8
## [61] RSQLite_2.2.17 highr_0.9 BiocIO_1.4.0
## [64] filelock_1.0.2 BiocParallel_1.28.3 rlang_1.0.6
## [67] pkgconfig_2.0.3 bitops_1.0-7 evaluate_0.16
## [70] lattice_0.20-45 labeling_0.4.2 GenomicAlignments_1.30.0
## [73] bit_4.0.4 tidyselect_1.1.2 magrittr_2.0.3
## [76] R6_2.5.1 generics_0.1.3 DelayedArray_0.20.0
## [79] DBI_1.1.3 pillar_1.8.1 haven_2.5.1
## [82] withr_2.5.0 abind_1.4-5 RCurl_1.98-1.8
## [85] modelr_0.1.9 crayon_1.5.2 car_3.1-0
## [88] utf8_1.2.2 BiocFileCache_2.2.1 tzdb_0.3.0
## [91] rmarkdown_2.16 readxl_1.4.1 grid_4.1.3
## [94] data.table_1.14.2 blob_1.2.3 reprex_2.0.2
## [97] digest_0.6.29 R.utils_2.12.0 munsell_0.5.0
## [100] bslib_0.4.0 sessioninfo_1.2.2